enrichMiR

#' getTS
#'
#' @param species character object. Can be "human", "mouse" or "rat"
#'
#' @return TargetScan miRNA target dataframe with family information in metadata()
#'
getTS <- function(species=c("human","mouse","rat")){
  library(S4Vectors)
  species <- match.arg(species)
  
  # assign species ID
  spec <- switch( species,
                  human = 9606,
                  mouse = 10090,
                  rat = 10116 )
  
  
  # download TargetScan miRNA targeting dataset
  tmp <- tempfile()
  download.file(
    "http://www.targetscan.org/vert_72/vert_72_data_download/Summary_Counts.all_predictions.txt.zip", 
    tmp)
  unzip(tmp)
  full <- fread("Summary_Counts.all_predictions.txt") #, sep = "\t", header = TRUE)
  
  # limit to selected species
  sub <- full[full$'Species ID' == spec,]
  sub$score <- as.numeric(as.character(sub$'Cumulative weighted context++ score'))
    
  # generate TargetScan dataframe
  ts <- DataFrame(family = sub$'miRNA family',
                   rep.miRNA = sub$'Representative miRNA',
                   feature = sub$'Gene Symbol',
                   sites = sub$'Total num conserved sites',
                   score = as.numeric(as.character(sub$'Cumulative weighted context++ score'))
                   )
  ts <- DataFrame(
    aggregate(sub[,c("sites","score")], by=ts[,c("family","feature")], FUN=mean)
    )
  

  
  # download TargetScan miRNA families dataset
  tmp <- tempfile()
  download.file(
    "http://www.targetscan.org/vert_72/vert_72_data_download/miR_Family_Info.txt.zip", 
    tmp)
  unzip(tmp)
  full <- fread("miR_Family_Info.txt") #, sep = "\t", header = TRUE)
  
  # limit to selected species
  sub <- full[full$'Species ID' == spec,]
  
  fam <- sub$`Seed+m8`
  names(fam) <- sub$`MiRBase ID`
  
  # add family info to ts dataframe as attribute
  metadata(ts)$families <- fam
  
  # enrichMiR cant handle 0 values for sites feature
  return(ts[ts$sites!=0,])
}
tests <- c("areamir","overlap","michael","KS","KS2","MW")
# all tests except WO:
tests <- c("overlap","michael","mw","ks","ks2","areamir","areamir2")
# all tests
tests <- c("overlap","michael","wo","mw","ks","ks2","gsea","gseamod","modscore","modsites","areamir","areamir2")
tests <- NULL
cores <- 8

# run enrichMiR on all objects of dea.list (high runtime!)

## foreach: doesnt work
library(foreach)
library(doParallel)
# cl <- makeCluster(cores)
# registerDoParallel(cl, cores=cores)
# chunk.size <- length(dea.names)/cores
# test <- foreach(c=1:cores, .combine="rbind") %dopar% {
#   for(i in dea.names[((c-1)*chunk.size+1):(c*chunk.size)]){
#     lapply(names(props.all), FUN=function(j){
#       enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr, 
#                 cleanNames=TRUE, tests=tests)
#       })
#     }
# }
#stopImplicitCluster()
# stopCluster(cl)

## foreach: Pierre's code (works, but 1 core)
res <- foreach( dea=dea.list, 
                TS=unlist(TS.list,recursive=FALSE) ) %dopar% {
                  enrichMiR( as.data.frame(dea), TS=TS, miRNA.expression=mirexpr, 
                             cleanNames=TRUE, tests=tests )
                }

## mclapply (works, but 1 core; all cores when reduced tests var)
## looks like GSEA, WO, mods are problematic
library(parallel)
# multicores on Linux
e.list <- mclapply(dea.names, FUN=function(i){
  lapply(names(props.all), FUN=function(j){
      enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr, 
                cleanNames=TRUE, tests=tests)
      })
  }, mc.cores = cores )
## mcmapply: inspired by Pierre's code
res <- mcmapply(dea=dea.list, 
                TS=unlist(TS.list,recursive=FALSE), 
                FUN=function(x){
                  enrichMiR( as.data.frame(dea), TS=TS, miRNA.expression=mirexpr, 
                             cleanNames=TRUE, tests=tests )
                  }, mc.cores = cores )


## bplapply: doesnt work at all
e.list <- bplapply(dea.names[1:4], FUN=function(i){
  lapply(names(props.all), FUN=function(j){
      enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr,
                cleanNames=TRUE, tests=tests)
      })
  }, BPPARAM = MulticoreParam(cores, progressbar = TRUE) )


## bpmapply: Pierre's code
res <- bpmapply(dea=dea.list, 
                TS=unlist(TS.list,recursive=FALSE), 
                BPPARAM=MulticoreParam(4, threshold="TRACE"), FUN=function(x){
                  enrichMiR( as.data.frame(dea), TS=TS, miRNA.expression=mirexpr, cleanNames=TRUE, tests=tests )
                  } )


## serial run
e.list <- lapply(dea.names, FUN=function(i){
  lapply(names(props.all), FUN=function(j){
      enrichMiR(DEA=as.data.frame(dea.list[[i]]), TS=TS.list[[i]][[j]], miRNA.expression=mirexpr, 
                cleanNames=TRUE, tests=tests)
      })
  })


# naming
names(e.list) <- dea.names
for(i in dea.names){
  names(e.list[[i]]) <- names(props.all)
}

# use this between parallel runs
gc(verbose = T)

Benchmarking

Plots

## Score analysis:  detPPV

## Score analysis:  FP.atFDR05

## Score analysis:  log10QDiff

## Score analysis:  TP.atFDR05

scores over the datasets

## $`miR-122`
## original       20       30       40       50 
##    0.851    0.851    0.851    0.851    0.780 
## 
## $`miR-138`
## original       20       30       40       50 
##    0.834    0.840    0.834    0.806    0.812 
## 
## $`miR-145`
## original       20       30       40       50 
##    0.802    0.754    0.730    0.683    0.546 
## 
## $`miR-184`
## original       20       30       40       50 
##    0.633    0.611    0.570    0.569    0.505 
## 
## $`miR-190a`
## original       20       30       40       50 
##    0.806    0.797    0.724    0.662    0.514 
## 
## $`miR-200b`
## original       20       30       40       50 
##    0.834    0.834    0.834    0.834    0.815 
## 
## $`miR-216a`
## original       20       30       40       50 
##    0.682    0.535    0.363    0.244    0.125 
## 
## $`miR-217`
## original       20       30       40       50 
##    0.770    0.752    0.771    0.638    0.498 
## 
## $`miR-219a`
## original       20       30       40       50 
##    0.826    0.851    0.843    0.834    0.831 
## 
## $`miR-375`
## original       20       30       40       50 
##    0.826    0.794    0.783    0.698    0.526 
## 
## $`miR-451a`
## original       20       30       40       50 
##    0.701    0.681    0.666    0.571    0.573

scores over methods

## $aREAmir
## original       20       30       40       50 
##    0.955    0.955    0.909    0.887    0.673 
## 
## $aREAmir2
## original       20       30       40       50 
##    0.955    0.955    0.830    0.778    0.614 
## 
## $EN.up
## original       20       30       40       50 
##    0.013    0.013    0.013    0.013    0.013 
## 
## $EN.down
## original       20       30       40       50 
##    1.000    0.949    0.933    0.815    0.639 
## 
## $EN.combined
## original       20       30       40       50 
##    1.000    0.949    0.934    0.869    0.758 
## 
## $wEN.up
## original       20       30       40       50 
##    0.006    0.006    0.007    0.007    0.007 
## 
## $wEN.down
## original       20       30       40       50 
##    1.000    1.000    0.980    0.977    0.898 
## 
## $wEN.combined
## original       20       30       40       50 
##    1.000    1.000    0.980    0.977    0.914 
## 
## $michael.up
## original       20       30       40       50 
##    0.006    0.007    0.007    0.007    0.007 
## 
## $michael.down
## original       20       30       40       50 
##    1.000    1.000    0.965    0.937    0.892 
## 
## $michael.combined
## original       20       30       40       50 
##    1.000    1.000    0.970    0.907    0.807 
## 
## $MW
## original       20       30       40       50 
##    0.865    0.799    0.754    0.636    0.413 
## 
## $KS
## original       20       30       40       50 
##    0.716    0.695    0.688    0.577    0.381 
## 
## $KS2
## original       20       30       40       50 
##    1.000    0.985    0.955    0.892    0.857 
## 
## $GSEA
## original       20       30       40       50 
##    0.528    0.483    0.481    0.468    0.534 
## 
## $GSEAmodified
## original       20       30       40       50 
##    0.447    0.388    0.412    0.315    0.378 
## 
## $modSites
## original       20       30       40       50 
##    0.955    0.892    0.811    0.689    0.566 
## 
## $modScore
## original       20       30       40       50 
##    1.000    0.955    0.881    0.720    0.679 
## 
## $combTest.1
## original       20       30       40       50 
##    1.000    0.985    0.955    0.927    0.883 
## 
## $combTest.2
## original       20       30       40       50 
##    1.000    0.955    0.955    0.947    0.934

TP ratio at FDR 0.05 over methods

## $aREAmir
## original       20       30       40       50 
##    0.818    0.788    0.636    0.545    0.455 
## 
## $aREAmir2
## original       20       30       40       50 
##    0.818    0.667    0.515    0.485    0.364 
## 
## $EN.up
## original       20       30       40       50 
##        0        0        0        0        0 
## 
## $EN.down
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $EN.combined
## original       20       30       40       50 
##     1.00     1.00     1.00     1.00     0.97 
## 
## $wEN.up
## original       20       30       40       50 
##        0        0        0        0        0 
## 
## $wEN.down
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $wEN.combined
## original       20       30       40       50 
##    1.000    1.000    1.000    1.000    0.939 
## 
## $michael.up
## original       20       30       40       50 
##        0        0        0        0        0 
## 
## $michael.down
## original       20       30       40       50 
##    1.000    1.000    1.000    0.970    0.818 
## 
## $michael.combined
## original       20       30       40       50 
##    1.000    1.000    1.000    0.939    0.758 
## 
## $MW
## original       20       30       40       50 
##    1.000    1.000    1.000    0.939    0.939 
## 
## $KS
## original       20       30       40       50 
##    1.000    1.000    1.000    1.000    0.939 
## 
## $KS2
## original       20       30       40       50 
##    0.909    0.848    0.818    0.818    0.667 
## 
## $GSEA
## original       20       30       40       50 
##    0.091    0.121    0.182    0.182    0.303 
## 
## $GSEAmodified
## original       20       30       40       50 
##    0.364    0.364    0.485    0.485    0.606 
## 
## $modSites
## original       20       30       40       50 
##    1.000    1.000    1.000    0.970    0.879 
## 
## $modScore
## original       20       30       40       50 
##    1.000    1.000    1.000    0.970    0.879 
## 
## $combTest.1
## original       20       30       40       50 
##        1        1        1        1        1 
## 
## $combTest.2
## original       20       30       40       50 
##     1.00     1.00     1.00     1.00     0.97